1   package org.apache.solr.search.facet;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.IdentityHashMap;
24  import java.util.Iterator;
25  import java.util.LinkedHashMap;
26  import java.util.List;
27  import java.util.Map;
28  
29  import org.apache.lucene.index.LeafReaderContext;
30  import org.apache.lucene.search.BooleanClause;
31  import org.apache.lucene.search.BooleanQuery;
32  import org.apache.lucene.search.MatchAllDocsQuery;
33  import org.apache.lucene.search.Query;
34  import org.apache.solr.common.SolrException;
35  import org.apache.solr.common.util.SimpleOrderedMap;
36  import org.apache.solr.handler.component.ResponseBuilder;
37  import org.apache.solr.request.SolrRequestInfo;
38  import org.apache.solr.schema.SchemaField;
39  import org.apache.solr.search.BitDocSet;
40  import org.apache.solr.search.DocIterator;
41  import org.apache.solr.search.DocSet;
42  import org.apache.solr.search.QParser;
43  import org.apache.solr.search.SolrIndexSearcher;
44  import org.apache.solr.search.SyntaxError;
45  
46  public class FacetProcessor<FacetRequestT extends FacetRequest>  {
47    protected SimpleOrderedMap<Object> response;
48    protected FacetContext fcontext;
49    protected FacetRequestT freq;
50  
51    LinkedHashMap<String,SlotAcc> accMap;
52    protected SlotAcc[] accs;
53    protected CountSlotAcc countAcc;
54  
55    FacetProcessor(FacetContext fcontext, FacetRequestT freq) {
56      this.fcontext = fcontext;
57      this.freq = freq;
58    }
59  
60    public void process() throws IOException {
61      handleDomainChanges();
62    }
63  
64    protected void handleDomainChanges() throws IOException {
65      if (freq.domain == null) return;
66      handleFilterExclusions();
67      handleBlockJoin();
68    }
69  
70    private void handleBlockJoin() throws IOException {
71      if (!(freq.domain.toChildren || freq.domain.toParent)) return;
72  
73      // TODO: avoid query parsing per-bucket somehow...
74      String parentStr = freq.domain.parents;
75      Query parentQuery;
76      try {
77        QParser parser = QParser.getParser(parentStr, null, fcontext.req);
78        parentQuery = parser.getQuery();
79      } catch (SyntaxError err) {
80        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
81      }
82  
83      BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
84      DocSet input = fcontext.base;
85      DocSet result;
86  
87      if (freq.domain.toChildren) {
88        DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
89        result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
90      } else {
91        result = BlockJoin.toParents(input, parents, fcontext.qcontext);
92      }
93  
94      fcontext.base = result;
95    }
96  
97    private void handleFilterExclusions() throws IOException {
98      List<String> excludeTags = freq.domain.excludeTags;
99  
100     if (excludeTags == null || excludeTags.size() == 0) {
101       return;
102     }
103 
104     // TODO: somehow remove responsebuilder dependency
105     ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
106     Map tagMap = (Map) rb.req.getContext().get("tags");
107     if (tagMap == null) {
108       // no filters were tagged
109       return;
110     }
111 
112     IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
113     for (String excludeTag : excludeTags) {
114       Object olst = tagMap.get(excludeTag);
115       // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
116       if (!(olst instanceof Collection)) continue;
117       for (Object o : (Collection<?>)olst) {
118         if (!(o instanceof QParser)) continue;
119         QParser qp = (QParser)o;
120         try {
121           excludeSet.put(qp.getQuery(), Boolean.TRUE);
122         } catch (SyntaxError syntaxError) {
123           // This should not happen since we should only be retrieving a previously parsed query
124           throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
125         }
126       }
127     }
128     if (excludeSet.size() == 0) return;
129 
130     List<Query> qlist = new ArrayList<>();
131 
132     // add the base query
133     if (!excludeSet.containsKey(rb.getQuery())) {
134       qlist.add(rb.getQuery());
135     }
136 
137     // add the filters
138     if (rb.getFilters() != null) {
139       for (Query q : rb.getFilters()) {
140         if (!excludeSet.containsKey(q)) {
141           qlist.add(q);
142         }
143       }
144     }
145 
146     // now walk back up the context tree
147     // TODO: we lose parent exclusions...
148     for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
149       if (curr.filter != null) {
150         qlist.add( curr.filter );
151       }
152     }
153 
154     // recompute the base domain
155     fcontext.base = fcontext.searcher.getDocSet(qlist);
156   }
157 
158 
159   public Object getResponse() {
160     return null;
161   }
162 
163 
164   protected void createAccs(int docCount, int slotCount) throws IOException {
165     accMap = new LinkedHashMap<>();
166 
167     // allow a custom count acc to be used
168     if (countAcc == null) {
169       countAcc = new CountSlotArrAcc(fcontext, slotCount);
170       countAcc.key = "count";
171     }
172 
173     for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
174       SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
175       acc.key = entry.getKey();
176       accMap.put(acc.key, acc);
177     }
178 
179 
180     accs = new SlotAcc[accMap.size()];
181     int i=0;
182     for (SlotAcc acc : accMap.values()) {
183       accs[i++] = acc;
184     }
185   }
186 
187 
188   protected void resetStats() {
189     countAcc.reset();
190     for (SlotAcc acc : accs) {
191       acc.reset();
192     }
193   }
194 
195   protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
196     if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
197       bucket.add("count", docCount);
198       return;
199     }
200     createAccs(docCount, 1);
201     int collected = collect(docs, 0);
202     countAcc.incrementCount(0, collected);
203     assert collected == docCount;
204     addStats(bucket, 0);
205   }
206 
207 
208   protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
209 
210     // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
211     // should we check for domain-altering exclusions, or even ask the sub-facet for
212     // it's domain and then only skip it if it's 0?
213 
214     if (domain == null || domain.size() == 0 && !freq.processEmpty) {
215       return;
216     }
217 
218     for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
219       // make a new context for each sub-facet since they can change the domain
220       FacetContext subContext = fcontext.sub(filter, domain);
221       FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
222       subProcessor.process();
223       response.add( sub.getKey(), subProcessor.getResponse() );
224     }
225   }
226 
227   int collect(DocSet docs, int slot) throws IOException {
228     int count = 0;
229     SolrIndexSearcher searcher = fcontext.searcher;
230 
231     final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
232     final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
233     LeafReaderContext ctx = null;
234     int segBase = 0;
235     int segMax;
236     int adjustedMax = 0;
237     for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
238       final int doc = docsIt.nextDoc();
239       if (doc >= adjustedMax) {
240         do {
241           ctx = ctxIt.next();
242           if (ctx == null) {
243             // should be impossible
244             throw new RuntimeException("INTERNAL FACET ERROR");
245           }
246           segBase = ctx.docBase;
247           segMax = ctx.reader().maxDoc();
248           adjustedMax = segBase + segMax;
249         } while (doc >= adjustedMax);
250         assert doc >= ctx.docBase;
251         setNextReader(ctx);
252       }
253       count++;
254       collect(doc - segBase, slot);  // per-seg collectors
255     }
256     return count;
257   }
258 
259   void collect(int segDoc, int slot) throws IOException {
260     if (accs != null) {
261       for (SlotAcc acc : accs) {
262         acc.collect(segDoc, slot);
263       }
264     }
265   }
266 
267   void setNextReader(LeafReaderContext ctx) throws IOException {
268     // countAcc.setNextReader is a no-op
269     for (SlotAcc acc : accs) {
270       acc.setNextReader(ctx);
271     }
272   }
273 
274 
275   void addStats(SimpleOrderedMap<Object> target, int slotNum) throws IOException {
276     int count = countAcc.getCount(slotNum);
277     target.add("count", count);
278     if (count > 0 || freq.processEmpty) {
279       for (SlotAcc acc : accs) {
280         acc.setValues(target, slotNum);
281       }
282     }
283   }
284 
285 
286   public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
287     boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
288 
289     // TODO: always collect counts or not???
290 
291     int count;
292 
293     if (result != null) {
294       count = result.size();
295     } else if (needDocSet) {
296       if (q == null) {
297         result = fcontext.base;
298         // result.incref(); // OFF-HEAP
299       } else {
300         result = fcontext.searcher.getDocSet(q, fcontext.base);
301       }
302       count = result.size();
303     } else {
304       if (q == null) {
305         count = fcontext.base.size();
306       } else {
307         count = fcontext.searcher.numDocs(q, fcontext.base);
308       }
309     }
310 
311     try {
312       processStats(bucket, result, (int) count);
313       processSubs(bucket, q, result);
314     } finally {
315       if (result != null) {
316         // result.decref(); // OFF-HEAP
317         result = null;
318       }
319     }
320   }
321 
322   public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
323     SchemaField sf = searcher.getSchema().getField(fieldName);
324     DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
325     DocSet answer = docs.andNot(hasVal);
326     // hasVal.decref(); // OFF-HEAP
327     return answer;
328   }
329 
330   public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
331     SchemaField sf = searcher.getSchema().getField(fieldName);
332     Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
333     BooleanQuery.Builder noVal = new BooleanQuery.Builder();
334     noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
335     return noVal.build();
336   }
337 
338 }